##############################################################################
# File-Name: 05_heterogenous_effects_tables.R
# Purpose: Main script to add tables for attrition analysis 
# status: ongoing
# Machine: MacOS High Sierra
##############################################################################


# Packages ----------------------------------------------------------------
pacman::p_load(tidyverse, scales,  knitr, srvyr, extrafont, 
               rebus, broom, tidyr, lubridate, here, ggdist, ggtext, ggalt, janitor, 
               broom, tidyr,  stargazer, janitor, estimatr, list)


source(here("code","_utils.r"))


# Data wrangling ----------------------------------------------------------

# open
d <- readRDS(here("data","all_processed_data.rds"))


# Functions ---------------------------------------------------------------

# In this code I  need to rerun the functions to modelling because the inputs are different
# the inputs of the functions are the dependent variable and the full model. 
# the purpose of this is because I need to change covariates to run the interactive effects. 

# ITT: simple ols
func_ols_base= function(depvar, mod, data, model=FALSE, int=FALSE){
    
    if(model==TRUE){
        model.expr = as.formula(paste(depvar, " ~ exp"))
        out = lm_robust(model.expr, data = data, se_type = "stata")
        return(out)}
    
    if(int==TRUE){
        model.expr = as.formula(paste(depvar, " ~ exp*", mod))
        out = lm_robust(model.expr, data = data, se_type = "stata")
        return(out)}
    
    model.expr = as.formula(paste(depvar, " ~ exp"))
    out = lm_robust(model.expr, data = data, se_type = "stata")
    
    # sd control
    sd_control <- data %>% 
        filter(exp=="control") %>%
        pull(depvar) %>%
        sd(., na.rm=TRUE)
    
    
    # tidy
    out <- out %>% 
        tidy() %>%
        filter(term=="exptreatment") %>%
        mutate_at(vars(c(estimate, std.error)), 
                  ~.x/sd_control)
    return(out)
    
}

# Cov-ITT: ols + controls
func_ols_covariates = function(depvar, mod, data, model=FALSE, int=FALSE){
    # For the controls, we will use
    # respondents’ demographics (sex, age, education, income, race), measures of trust in mainstream media
    # and institutions, pre-treatment measures of polarization, ideology and social media usage.
    add.vars = c(           
        #demographics
        "w1_q_gender" ,  
        "q_race" ,  
        "w1_q_education" ,
        "w1_q_age_num",
        "income_num",
        
        
        
        # trust  
        "as.numeric(w1_q_trust_government)" ,
        "as.numeric(w1_q_trust_congress)" ,
        "as.numeric(w1_q_trust_supreme_court)" ,
        "as.numeric(w1_q_trust_electoral_authorities)" ,
        "as.numeric(w1_q_trust_news_channels)" ,
        "w1_q_legitimacy" ,
        
        # polarization    
        "w1_affective_pol_bolsonaro" ,
        "w1_affective_pol_lula",
        #"w1_vote_first_recode",
        
        # ideology  
        "w1_ideology_you" ,  
        "w1_q_politics_num" ,   
        
        # social media usage
        "w1_q_whatsapp_num ",
        "as.numeric(w1_q_fake_news)" ,
        "as.numeric(w1_q_whatsapp_purposes_news)" ,
        "as.numeric(w1_q_whatsapp_purposes_pay_bills)" ,
        "as.numeric(w1_q_whatsapp_purposes_family)" ,
        "as.numeric(w1_q_whatsapp_purposes_groups)"
    )
    if(model==TRUE){
        model.expr = as.formula(paste(depvar, " ~ exp +",
                                      paste(add.vars, collapse = "+")))
        
        out = lm_robust(model.expr, data = data)
        return(out)
    }    
    if(int==TRUE){
        model.expr = as.formula(paste(depvar, " ~ exp*", mod,  "+",
                                      paste(add.vars, collapse = "+")))
        
        out = lm_robust(model.expr, data = data)
        return(out)
    }    
    
    model.expr = as.formula(paste(depvar, " ~ exp +",
                                  paste(add.vars, collapse = "+")))
    
    out = lm_robust(model.expr, data = data)
    # sd control
    sd_control <- data %>% 
        filter(exp=="control") %>%
        pull(depvar) %>%
        sd(., na.rm=TRUE)
    
    # tidy
    out <- out %>% 
        tidy() %>%
        filter(term=="exptreatment") %>%
        mutate_at(vars(c(estimate, std.error)), 
                  ~.x/sd_control)
    
    return(out)
}

# Heterogenous effects based on digital literacy ------

# moderator
mod = "dl_zcore" # higher values = higher digital literacy

#### DV:Exposure to false and true news

# false
depvar = "false_news_exp"
false_false_e_itt <- func_ols_base(depvar, mod, d, int=TRUE)
false_false_e_ittc <- func_ols_covariates(depvar, mod, d, int=TRUE)
# true
depvar = "true_news_e"
true_true_e_itt <- func_ols_base(depvar, mod, d, int=TRUE); true_true_e_itt
true_true_e_ittc <- func_ols_covariates(depvar,mod, d, int=TRUE); true_true_e_ittc

##### H2 - DV: Belief Accuracy

# false
depvar = "false_false_sum"
false_false_itt <- func_ols_base(depvar, mod, d, int=TRUE)
false_false_ittc <- func_ols_covariates(depvar, mod, d, int=TRUE)

#true
depvar = "true_true_sum"
true_true_itt <- func_ols_base(depvar, mod, d, int=TRUE)
true_true_ittc <- func_ols_covariates(depvar,mod, d, int=TRUE)

# DV:Polarization

# All
list_politems_items <- list("pol_all_zcore", 
                            "distance_candidates",
                            "affective_pol_diff_all", 
                            "social_pol_agg", 
                            "mean_abs_policy_polarization")

list_pol_itt <- map(list_politems_items, ~ func_ols_base(.x, mod, d, int=TRUE))
list_pol_ittc <- map(list_politems_items, ~ func_ols_covariates(.x, mod, d, int=TRUE))

# only partisans
list_politems_items <- list("pol_partisans_zscore", 
                            "false_polarization", 
                            "affect_pol_diff_partisans",
                            "social_pol_agg", 
                            "mean_policy_polarization_direction")

# get only partisans
list_itt <- map(list_politems_items, ~ func_ols_base(.x, mod, d, int=TRUE))
list_ittc <- map(list_politems_items, ~ func_ols_covariates(.x, mod, d, int=TRUE))


# DV: Subjective WellBeing

# double check the index
list_wb_items <- list("swb_zcore", 
                      "q_well_being_happy_numeric"  ,
                      "q_well_being_depressed_numeric",
                      "q_well_being_anxious_numeric",
                      "q_well_being_isolated_numeric", 
                      "q_well_being_satisfied_numeric" )

# get only partisans
list_swb_itt <- map(list_wb_items, ~ func_ols_base(.x, mod, d, int=TRUE))
list_swb_ittc <- map(list_wb_items, ~ func_ols_covariates(.x, mod, d, int=TRUE))

# plot all together
output = flatten(list(list_itt, list_ittc))

# For regression tables 
# Model with correct interaction terms
new_names = c ("Intercept)"  = "Intercept",  
               "exptreatment" ="Treatment",
               "dl_zcore"  = "Digital Literacy", 
               "exptreatment:dl_zcore" = "Treatment x Digital Literacy")

library(modelsummary)
library(kableExtra)
options("modelsummary_format_numeric_latex" = "plain")

# table - latex version
# modelsummary(list("False Rumors Exposure"=false_false_e_ittc,
#                   "True News Exposure"=true_true_e_ittc,
#                   "False Rumors Accuracy"=false_false_ittc,
#                   "True News Accuracy"=true_true_ittc,
#                   "Polarization Index"=list_pol_ittc[[1]],
#                   "Subjective Well-Being Index"=list_swb_ittc[[1]]),
#              # if a latex issue is raised, just comment out everything below, and run only the modelsummary to see the results
#              output="latex",
#              fmt = 3,
#              stars = TRUE,
#              coef_map = new_names,
#              title = "Regression Models: Heterogenous effects of the Deactivation conditional on Digital Literacy Score ",
#              gof_omit = 'AIC|BIC') %>%
#     footnote(general = " Robust standard errors in Parentheses. All models use the Covariate-Adjusted ITT estimator. ",
#              threeparttable = TRUE)  %>%
#     save_kable(file ="output/sm_tab10.tex")

# if a latex issue is raised in the table above, you can check the results in this version:
modelsummary(list("False Rumors Exposure"=false_false_e_ittc,
                  "True News Exposure"=true_true_e_ittc,
                  "False Rumors Accuracy"=false_false_ittc,
                  "True News Accuracy"=true_true_ittc,
                  "Polarization Index"=list_pol_ittc[[1]],
                  "Subjective Well-Being Index"=list_swb_ittc[[1]]),
             fmt = 3,
             stars = TRUE,
             coef_map = new_names,
             title = "Regression Models: Heterogenous effects of the Deactivation conditional on Digital Literacy Score ",
             gof_omit = 'AIC|BIC')


# Moderator Age - Table 11 -------------------------------------------------------------------------

# remove age as a covariate.
func_ols_covariates_no_age = function(depvar, mod, data, model=FALSE, int=FALSE){
    # For the controls, we will use
    # respondents’ demographics (sex, age, education, income, race), measures of trust in mainstream media
    # and institutions, pre-treatment measures of polarization, ideology and social media usage.
    add.vars = c(           
        #demographics
        "w1_q_gender" ,  
        "q_race" ,  
        "w1_q_education" ,
        "income_num",
       
         # trust  
        "as.numeric(w1_q_trust_government)" ,
        "as.numeric(w1_q_trust_congress)" ,
        "as.numeric(w1_q_trust_supreme_court)" ,
        "as.numeric(w1_q_trust_electoral_authorities)" ,
        "as.numeric(w1_q_trust_globo)" ,
        "as.numeric(w1_q_trust_news_channels)" ,
        "w1_q_legitimacy" ,
        
        # polarization    
        "w1_affective_pol_bolsonaro" ,
        " w1_affective_pol_lula" ,
        
        # ideology  
        "w1_ideology_you" ,  
        "w1_q_politics_num" ,   
        
        # social media usage
        "w1_q_whatsapp_num ",
        "as.numeric(w1_q_fake_news)" ,
        "as.numeric(w1_q_whatsapp_purposes_news)" ,
        "as.numeric(w1_q_whatsapp_purposes_pay_bills)" ,
        "as.numeric(w1_q_whatsapp_purposes_family)" ,
        "as.numeric(w1_q_whatsapp_purposes_groups)"
    )
    
    if(model==TRUE){
        model.expr = as.formula(paste(depvar, " ~ exp +",
                                      paste(add.vars, collapse = "+")))
        
        out = lm_robust(model.expr, data = data)
        return(out)
    }    
    if(int==TRUE){
        model.expr = as.formula(paste(depvar, " ~ exp*", mod,  "+",
                                      paste(add.vars, collapse = "+")))
        
        out = lm_robust(model.expr, se_type = "HC0", data = data)
        return(out)
    }    
    
    model.expr = as.formula(paste(depvar, " ~ exp +",
                                  paste(add.vars, collapse = "+")))
    
    out = lm_robust(model.expr, data = data)
    # sd control
    sd_control <- data %>% 
        filter(exp=="treatment") %>%
        pull(depvar) %>%
        sd(., na.rm=TRUE)
    
    # tidy
    out <- out %>% 
        tidy() %>%
        filter(term=="exptreatment") %>%
        mutate_at(vars(c(estimate, std.error)), 
                  ~.x/sd_control)
    
    return(out)
}

# Moderator
mod = "w1_q_age_num"

#### DV:H1 - Exposure
depvar = "false_news_exp"
false_false_e_itt <- func_ols_base(depvar, mod, d, int=TRUE)
false_false_e_ittc <- func_ols_covariates_no_age(depvar, mod, d, int=TRUE)

depvar = "true_news_e"
true_true_e_itt <- func_ols_base(depvar, mod, d, int=TRUE)
true_true_e_ittc <- func_ols_covariates_no_age(depvar,mod, d, int=TRUE)

##### DV: H2 - Belief accuracy
depvar = "false_false_sum"
false_false_itt <- func_ols_base(depvar, mod, d, int=TRUE)
false_false_ittc <- func_ols_covariates_no_age(depvar, mod, d, int=TRUE)

depvar = "true_true_sum"
true_true_itt <- func_ols_base(depvar, mod, d, int=TRUE)
true_true_ittc <- func_ols_covariates_no_age(depvar,mod, d, int=TRUE)


# DV:Polarization

# All
list_politems_items <- list("pol_all_zcore", 
                            "distance_candidates",
                            "affective_pol_diff_all", 
                            "social_pol_agg", 
                            "mean_abs_policy_polarization")

list_pol_itt <- map(list_politems_items, ~ func_ols_base(.x, mod, d, int=TRUE))
list_pol_ittc <- map(list_politems_items, ~ func_ols_covariates_no_age(.x, mod, d, int=TRUE))


# DV:WellBeing

# double check the index
list_wb_items <- list("swb_zcore", 
                      "q_well_being_happy_numeric"  ,
                      "q_well_being_depressed_numeric",
                      "q_well_being_anxious_numeric",
                      "q_well_being_isolated_numeric", 
                      "q_well_being_satisfied_numeric" )

# models
list_swb_itt <- map(list_wb_items, ~ func_ols_base(.x, mod, d, int=TRUE))
list_swb_ittc <- map(list_wb_items, ~ func_ols_covariates_no_age(.x, mod, d, int=TRUE))

# For regression tables 

# Model with correct interaction terms
new_names = c ("Intercept)"  = "Intercept",  
               "exptreatment" ="Treatment",
               "w1_q_age_num"  = "Age", 
               "exptreatment:w1_q_age_num" = "Treatment x Age")

library(modelsummary)
library(kableExtra)
options("modelsummary_format_numeric_latex" = "plain")

# latex table
# modelsummary(list("False Rumors Exposure"=false_false_e_ittc,
#                   "True News  Exposure"=true_true_e_ittc,
#                   "False Rumors Accuracy"=false_false_ittc,
#                   "True News Accuracy"=true_true_ittc,
#                   "Polarization Index"=list_pol_ittc[[1]],
#                   "Subjective Well-Being Index"=list_swb_ittc[[1]]),
#              output="latex",
#              fmt = 3,
#              stars = TRUE,
#              coef_map = new_names,
#              title = "Regression Models: Heterogenous effects of the Deactivation conditional on Age  ",
#              gof_omit = 'AIC|BIC') %>%
#     footnote(general = " Robust standard errors in Parentheses. All models use the Covariate-Adjusted ITT estimator. ",
#              threeparttable = TRUE)  %>%
#     save_kable(file ="output/sm_tab11.tex")


# if a latex issue is raised in the table above, you can check the results in this version:
modelsummary(list("False Rumors Exposure"=false_false_e_ittc,
                  "True News  Exposure"=true_true_e_ittc,
                  "False Rumors Accuracy"=false_false_ittc,
                  "True News Accuracy"=true_true_ittc,
                  "Polarization Index"=list_pol_ittc[[1]],
                  "Subjective Well-Being Index"=list_swb_ittc[[1]]),
             fmt = 3,
             stars = TRUE,
             coef_map = new_names,
             title = "Regression Models: Heterogenous effects of the Deactivation conditional on Age  ",
             gof_omit = 'AIC|BIC')
   


#  Moderator WhatsApp usage for politics - Table 12 -------------------------------------------------------------------------

# convert to numeric
d$whats_pol_n <- as.numeric(fct_rev(d$w1_q_whatsapp_purposes_politics))

# consider don't know as NA
d$whats_pol_n <- ifelse(d$whats_pol_n==1, NA,d$whats_pol_n-2)

# moderator
mod = "whats_pol_n" # higher values mean use more whatsapp for politics

#### DV:H1 - Exposure
depvar = "false_news_exp"
false_false_e_itt <- func_ols_base(depvar, mod, d, int=TRUE)
false_false_e_ittc <- func_ols_covariates(depvar, mod, d, int=TRUE)

depvar = "true_news_e"
true_true_e_itt <- func_ols_base(depvar, mod, d, int=TRUE)
true_true_e_ittc <- func_ols_covariates_no_age(depvar,mod, d, int=TRUE)

##### DV: H2 - Belief accuracy
depvar = "false_false_sum"
false_false_itt <- func_ols_base(depvar, mod, d, int=TRUE)
false_false_ittc <- func_ols_covariates(depvar, mod, d, int=TRUE)

depvar = "true_true_sum"
true_true_itt <- func_ols_base(depvar, mod, d, int=TRUE)
true_true_ittc <- func_ols_covariates_no_age(depvar,mod, d, int=TRUE)


# DV:Polarization

# All
list_politems_items <- list("pol_all_zcore", 
                            "distance_candidates",
                            "affective_pol_diff_all", 
                            "social_pol_agg", 
                            "mean_abs_policy_polarization")

list_pol_itt <- map(list_politems_items, ~ func_ols_base(.x, mod, d, int=TRUE))
list_pol_ittc <- map(list_politems_items, ~ func_ols_covariates_no_age(.x, mod, d, int=TRUE))
list_pol_itt
list_pol_ittc


# DV WellBeing

# double check the index
list_wb_items <- list("swb_zcore", 
                      "q_well_being_happy_numeric"  ,
                      "q_well_being_depressed_numeric",
                      "q_well_being_anxious_numeric",
                      "q_well_being_isolated_numeric", 
                      "q_well_being_satisfied_numeric" )

# get only partisans
list_swb_itt <- map(list_wb_items, ~ func_ols_base(.x, mod, d, int=TRUE))
list_swb_ittc <- map(list_wb_items, ~ func_ols_covariates_no_age(.x, mod, d, int=TRUE))
list_swb_ittc

# For regression tables ---------------------------------------------------

# Model with correct interaction terms
new_names = c ("Intercept)"  = "Intercept",  
               "exptreatment" ="Treatment",
               "whats_pol_n"  = "WhatsApp usage for Politics", 
               "exptreatment:whats_pol_n" = "Treatment x WhatsApp usage for Politics")

# latex table
# modelsummary(list("False Rumors Exposure"=false_false_e_ittc,
#                   "True News Exposure"=true_true_e_ittc,
#                   "False Rumors Accuracy"=false_false_ittc,
#                   "True News Accuracy"=true_true_ittc,
#                   "Polarization Index"=list_pol_ittc[[1]],
#                   "Subjective Well-Being Index"=list_swb_ittc[[1]]),
#              output="latex",
#              fmt = 3,
#              stars = TRUE,
#              coef_map = new_names,
#              title = "Regression Models: Heterogenous effects of the Deactivation conditional on WhatsApp Usage for Politics ",
#              gof_omit = 'AIC|BIC') %>%
#     footnote(general = " Robust standard errors in Parentheses. All models use the Covariate-Adjusted ITT estimator. ",
#              threeparttable = TRUE)  %>%
#     save_kable(file = paste0("output/sm_tab12.tex"))

# if a latex issue is raised in the table above, you can check the results in this version:

modelsummary(list("False Rumors Exposure"=false_false_e_ittc,
                  "True News Exposure"=true_true_e_ittc,
                  "False Rumors Accuracy"=false_false_ittc,
                  "True News Accuracy"=true_true_ittc,
                  "Polarization Index"=list_pol_ittc[[1]],
                  "Subjective Well-Being Index"=list_swb_ittc[[1]]),
             fmt = 3,
             stars = TRUE,
             coef_map = new_names,
             title = "Regression Models: Heterogenous effects of the Deactivation conditional on WhatsApp Usage for Politics ",
             gof_omit = 'AIC|BIC') 